#!/usr/bin/env python

import re
import sys
import os
import os.path
import glob
import subprocess
import collections
from operator import itemgetter

def help():
  print """\
cmpcodesize [options] <old-files> [--] <new-files>

Compares code sizes of "new" files, taking "old" files as a reference.

Options:
    -a           Show sizes of additional sections
    -c           Show functions by category
    -l           List all functions (can be a very long list)
    -s           Summarize the sizes of multiple files instead of listing each file separatly

Environment variables:
    SWIFT_NEW_BUILDDIR   The old build-dir
E.g. $HOME/swift-work/build/Ninja-ReleaseAssert+stdlib-Release/swift-macosx-x86_64
    SWIFT_OLD_BUILDDIR   The new build-dir
E.g. $HOME/swift-reference/build/Ninja-ReleaseAssert+stdlib-Release/swift-macosx-x86_64

How to specify files:
1) No files:
    Compares codesize of the PerfTests_* executables and the swiftCore dylib in the new and old build-dirs.
    Example:
        cmpcodesize

2) One ore more pathes relative to the build-dirs (can be a pattern):
    Compares that files in the new and old build-dirs.
    Aliases:
        O          => bin/PerfTests_O
        Ounchecked => bin/PerfTests_Ounchecked
        Onone      => bin/PerfTests_Onone
        dylib      => lib/swift/macosx/x86_64/libswiftCore.dylib
    Examples:
        cmpcodesize Onone
        cmpcodesize benchmark/PerfTestSuite/O/*.o

3) Two files:
    Compares these two files (the first is the old file).
    Example:
        cmpcodesize test.o newversion.o

4) Two lists of files, separated by '--':
    Compares a set a files.
    Example:
        cmpcodesize olddir/*.o -- newdir/*.o

5) One file (only available with the -l option):
    Lists function sizes for that file
    Example:
        cmpcodesize -l test.o
"""

Prefixes = {
    # Cpp
    "__Z" : "CPP",
    "_swift" : "CPP",
    "__swift" : "CPP",

    # Objective-C
    "+[" : "ObjC",
    "-[" : "ObjC",

    # Swift
    "__TP"  : "Partial Apply",
    "__TTW" : "Protocol Witness",
    "__Tw"  : "Value Witness",
    "__TM"  : "Type Metadata",
    "__TF"  : "Swift Function",
    "__TTSg" : "Generic Spec",
    "__TTSf" : "FuncSig Spec",
    "__TZF" : "Static Func",
    # Function signature specialization of a generic specialization.
    "__TTSGF" : "FuncSigGen Spec",
    "__TTo" : "Swift @objc Func",
}

Infixes = {
  #Swift
  "q_" : "Generic Function"
}

GenericFunctionPrefix = "__TTSg"

SortedPrefixes = sorted(Prefixes)
SortedInfixes = sorted(Infixes)

def addFunction(sizes, function, startAddr, endAddr, groupByPrefix):
    if not function or startAddr == None or endAddr == None:
        return

    size = endAddr - startAddr

    if groupByPrefix:
        for infix in SortedInfixes:
	    if infix in function:
               if not GenericFunctionPrefix in function:
	           sizes[Infixes[infix]] += size
                   return
        for prefix in SortedPrefixes:
	    if function.startswith(prefix):
                # Special handling for function signature specializations
                # of generic specializations.
                if prefix == "__TTSf" and GenericFunctionPrefix in function:
                    prefix = "__TTSGF"
                sizes[Prefixes[prefix]] += size
                return
        sizes["Unknown"] += size
    else:
        sizes[function] += size

def flatten(*args):
    for x in args:
        if hasattr(x, '__iter__'):
            for y in flatten(*x):
                yield y
        else:
            yield x

def readSizes(sizes, fileName, functionDetails, groupByPrefix):
    # Check if multiple architectures are supported by the object file.
    # Prefer arm64 if available.
    architectures = subprocess.check_output(["otool", "-V", "-f", fileName]).split("\n")
    arch = None
    archPattern = re.compile('architecture ([\S]+)')
    for architecture in architectures:
        archMatch = archPattern.match(architecture)
        if archMatch:
            if arch is None:
                arch = archMatch.group(1)
            if "arm64" in arch:
                arch = "arm64"
    if arch is not None:
      archParams = ["-arch", arch]
    else:
      archParams = []

    if functionDetails:
        content = subprocess.check_output(flatten(["otool", archParams, "-l", "-v", "-t", fileName])).split("\n")
        content += subprocess.check_output(flatten(["otool", archParams, "-v", "-s", "__TEXT", "__textcoal_nt", fileName])).split("\n")
    else:
        content = subprocess.check_output(flatten(["otool", archParams, "-l", fileName])).split("\n")

    sectName = None
    currFunc = None
    startAddr = None
    endAddr = None

    sectionPattern = re.compile(' +sectname ([\S]+)')
    sizePattern = re.compile(' +size ([\da-fx]+)')
    asmlinePattern = re.compile('^([0-9a-fA-F]+)\s')
    labelPattern = re.compile('^((\-*\[[^\]]*\])|[^\/\s]+):$')

    for line in content:
        asmlineMatch = asmlinePattern.match(line)
        if asmlineMatch:
            addr = int(asmlineMatch.group(1), 16)
            if startAddr == None:
                startAddr = addr
            endAddr = addr
        elif line == "Section":
            sectName = None
        else:
            labelMatch = labelPattern.match(line)
            sizeMatch = sizePattern.match(line)
            sectionMatch = sectionPattern.match(line)
            if labelMatch:
                funcName = labelMatch.group(1)
                addFunction(sizes, currFunc, startAddr, endAddr, groupByPrefix)
                currFunc = funcName
                startAddr = None
                endAddr = None
            elif sizeMatch and sectName and groupByPrefix:
                size = int(sizeMatch.group(1), 16)
                sizes[sectName] += size
            elif sectionMatch:
                sectName = sectionMatch.group(1)
                if sectName == "__textcoal_nt":
                    sectName = "__text"

    addFunction(sizes, currFunc, startAddr, endAddr, groupByPrefix)

def compareSizes(oldSizes, newSizes, nameKey, title):
    oldSize = oldSizes[nameKey]
    newSize = newSizes[nameKey]
    if oldSize != None and newSize != None:
        if oldSize != 0:
            perc = "%.1f%%" % ((1.0 - float(newSize) / float(oldSize)) * 100.0)
        else:
            perc = "- "
        print "%-26s%16s: %8d  %8d  %6s" % (title, nameKey, oldSize, newSize, perc)

def compareSizesOfFile(oldFiles, newFiles, allSections, listCategories):
    oldSizes = collections.defaultdict(int)
    newSizes = collections.defaultdict(int)
    for oldFile in oldFiles:
        readSizes(oldSizes, oldFile, listCategories, True)
    for newFile in newFiles:
        readSizes(newSizes, newFile, listCategories, True)

    if len(oldFiles) == 1 and len(newFiles) == 1:
        oldBase = os.path.basename(oldFiles[0])
        newBase = os.path.basename(newFiles[0])
        title = oldBase
        if oldBase != newBase:
            title += "-" + newBase
    else:
        title = "old-new"

    compareSizes(oldSizes, newSizes, "__text", title)
    if listCategories:
        prev = None
        for categoryName in sorted(Prefixes.values()) + sorted(Infixes.values())+ ["Unknown"]:
            if categoryName != prev:
                compareSizes(oldSizes, newSizes, categoryName, "")
            prev = categoryName

    if allSections:
        sectionTitle = "    section"
        compareSizes(oldSizes, newSizes, "__textcoal_nt", sectionTitle)
        compareSizes(oldSizes, newSizes, "__stubs", sectionTitle)
        compareSizes(oldSizes, newSizes, "__const", sectionTitle)
        compareSizes(oldSizes, newSizes, "__cstring", sectionTitle)
        compareSizes(oldSizes, newSizes, "__objc_methname", sectionTitle)
        compareSizes(oldSizes, newSizes, "__const", sectionTitle)
        compareSizes(oldSizes, newSizes, "__objc_const", sectionTitle)
        compareSizes(oldSizes, newSizes, "__data", sectionTitle)
        compareSizes(oldSizes, newSizes, "__swift1_proto", sectionTitle)
        compareSizes(oldSizes, newSizes, "__common", sectionTitle)
        compareSizes(oldSizes, newSizes, "__bss", sectionTitle)

def listFunctionSizes(sizeArray):
    for pair in sorted(sizeArray, key=itemgetter(1)):
        name = pair[0]
        size = pair[1]
        print "%8d %s" % (size, name)

def compareFunctionSizes(oldFiles, newFiles):
    oldSizes = collections.defaultdict(int)
    newSizes = collections.defaultdict(int)
    for name in oldFiles:
        readSizes(oldSizes, name, True, False)
    for name in newFiles:
        readSizes(newSizes, name, True, False)

    onlyInFile1 = []
    onlyInFile2 = []
    inBoth = []

    onlyInFile1Size = 0
    onlyInFile2Size = 0
    inBothSize = 0

    for func, oldSize in oldSizes.items():
        newSize = newSizes[func]
        if newSize != 0:
            inBoth.append((func, oldSize, newSize))
        else:
            onlyInFile1.append((func, oldSize))
            onlyInFile1Size += oldSize

    for func, newSize in newSizes.items():
        oldSize = oldSizes[func]
        if oldSize == 0:
            onlyInFile2.append((func, newSize))
            onlyInFile2Size += newSize

    if onlyInFile1:
        print "Only in old file(s)"
        listFunctionSizes(onlyInFile1)
        print "Total size of functions only in old file: {}".format(onlyInFile1Size)
        print

    if onlyInFile2:
        print "Only in new files(s)"
        listFunctionSizes(onlyInFile2)
        print "Total size of functions only in new file: {}".format(onlyInFile2Size) 
        print

    if inBoth:
        sizeIncrease = 0
        sizeDecrease = 0
        print "%8s %8s %8s" % ("old", "new", "diff")
        for triple in sorted(inBoth, key=lambda tup: (tup[2] - tup[1], tup[1])):
            func = triple[0]
            oldSize = triple[1]
            newSize = triple[2]
            diff = newSize - oldSize
            if diff > 0:
                sizeIncrease += diff
            else:
                sizeDecrease -= diff
            if diff == 0:
                inBothSize += newSize
            print "%8d %8d %8d %s" %(oldSize, newSize, newSize - oldSize, func)
        print "Total size of functions with the same size in both files: {}".format(inBothSize)
        print "Total size of functions that got smaller: {}".format(sizeDecrease)
        print "Total size of functions that got bigger: {}".format(sizeIncrease)
        print "Total size change of functions present in both files: {}".format(sizeIncrease - sizeDecrease)

def main():
    allSections = False
    listCategories = False
    listFunctions = False
    separatorFound = False
    sumSizes = False
    oldFileArgs = []
    newFileArgs = []
    curFiles = oldFileArgs
    for arg in sys.argv[1:]:
        if arg == "-a":
            allSections = True
        elif arg == "-c":
            listCategories = True
        elif arg == "-s":
            sumSizes = True
        elif arg == "-l":
            listFunctions = True
        elif arg == "--":
            curFiles = newFileArgs
            separatorFound = True
        elif arg == "-h":
            help()
            return
        elif arg.startswith("-"):
            sys.exit("Unknown option. Use -h to display usage.")
        else:
            curFiles.append(arg)
    

    oldBuildDir = os.environ.get("SWIFT_OLD_BUILDDIR")
    newBuildDir = os.environ.get("SWIFT_NEW_BUILDDIR")

    if separatorFound:
        oldFiles = oldFileArgs
        newFiles = newFileArgs
    else:
        if not oldFileArgs:
            if listFunctions:
                sys.exit("Must specify file for the -l option")
            if not oldBuildDir:
                sys.exit("$SWIFT_OLD_BUILDDIR not specified")
            if not newBuildDir:
                die("$SWIFT_NEW_BUILDDIR not specified")
            oldFileArgs = [ "O", "Ounchecked", "Onone", "dylib" ]
        oldFiles = []
        newFiles = []
        numExpanded = 0
        for file in oldFileArgs:
            shortcuts = {
                "O"          : "bin/PerfTests_O",
                "Ounchecked" : "bin/PerfTests_Ounchecked",
                "Onone"      : "bin/PerfTests_Onone",
                "dylib"      : "lib/swift/macosx/x86_64/libswiftCore.dylib"
            }
            if file in shortcuts:
                file = shortcuts[file]

            if not file.startswith("./") and oldBuildDir and newBuildDir:
                oldExpanded = glob.glob(oldBuildDir + "/" + file)
                newExpanded = glob.glob(newBuildDir + "/" + file)
                if oldExpanded and newExpanded:
                    oldFiles.extend(oldExpanded)
                    newFiles.extend(newExpanded)
                    numExpanded += 1

        if numExpanded != 0 and numExpanded != len(oldFileArgs):
            sys.exit("mix of expanded/not-expanded arguments") 
        if numExpanded == 0:
            if len(oldFileArgs) > 2:
                sys.exit("too many arguments")
            oldFiles = oldFileArgs[0:1]
            newFiles = oldFileArgs[1:2]

    for file in (oldFiles + newFiles):
        if not os.path.isfile(file):
            sys.exit("file " + file + " not found")

    if listFunctions:
        if allSections or listCategories:
            print >> sys.stderr, "Warning: options -a and -c ignored when using -l"
        if not newFiles:
            sizes = collections.defaultdict(int)
            for file in oldFiles:
                readSizes(sizes, file, True, False)
            listFunctionSizes(sizes.items())
        else:
            compareFunctionSizes(oldFiles, newFiles)
    else:
        print "%-26s%16s  %8s  %8s  %s" % ("", "Section", "Old", "New", "Percent")
        if sumSizes:
            compareSizesOfFile(oldFiles, newFiles, allSections, listCategories)
        else:
            if len(oldFiles) != len(newFiles):
                sys.exit("number of new files must be the same of old files")

            oldFiles.sort
            newFiles.sort

            for idx, oldFile in enumerate(oldFiles):
                newFile = newFiles[idx]
                compareSizesOfFile([oldFile], [newFile], allSections, listCategories)

main()

